library(ggplot2)
library(tidyverse)
library(GGally)
library(ggpubr)

#load the dataframes for the figure

remove cells in the edges : pos.x > 45 & < 1150 pos.y > 45 & < 1000 ##add the replicate info


aic.df <- aic.df %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

aic.df <- aic.df %>% 
  mutate(colony = ifelse(degron == "stable" & red == "pup1-rfp", 
                         case_when(exp.field %in% c("20min_s4", "20min_s5") ~ "Replicate 1",
                                   exp.field %in% c("20min_s6", "20min_s7") ~ "Replicate 2",
                                   exp.field %in% c("20min_s8", "20min_s9") ~ "Replicate 3"), colony)) 
  # filter(value < 0.05)
  

##Cellular attributes

pup1.cell.attr <- read_csv("~/plots/all_data/all_pup1_cell_attr.csv")
Rows: 19396 Columns: 29
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (4): cell.id, degron, red, treatment
dbl (25): gfp.mean.bg.af.sub.new, gfp.sum.bg.af.sub, area, area.puncta, BB_B, BB_C, Elip_B, Elip_C, no.of.voxels, pos.x, pos.y, spheracity, volume, rfp.mean...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

###pup1-RFP background

pup1.cell.attr <- pup1.cell.attr %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

pup1.cell.attr <- pup1.cell.attr %>% 
  mutate(colony = ifelse(degron == "stable" & red == "pup1-rfp", 
                         case_when(exp.field %in% c("20min_s4", "20min_s5") ~ "Replicate 1",
                                   exp.field %in% c("20min_s6", "20min_s7") ~ "Replicate 2",
                                   exp.field %in% c("20min_s8", "20min_s9") ~ "Replicate 3"), colony)) 
  
#removing the 40min and 60min experiments from the stable gfp experiments
pup1.cell.attr <- pup1.cell.attr %>%
  mutate(exp = str_split(exp.field, "_", simplify = T)[,1]) %>% 
  filter(exp == "20min")

Adding the gfp.sum column for the stable GFP experiment

pup1.cell.attr <- read_csv("~/plots/pup1-rfp-gfp-decay/4-28-21-8hrGFP/data/gfp_stable_filtered.csv") %>% 
  filter(image.no == 1) %>% 
  dplyr::select(cell.id, gfpSumBgAFsub) %>% 
  mutate(cell.id = paste0(cell.id,"_","stable_pup1-rfp_none")) %>% 
  dplyr::rename("gfp.sum.bg.af.sub" = "gfpSumBgAFsub") %>% 
  left_join(pup1.cell.attr %>% 
              filter(degron == "stable") %>% 
              dplyr::select(-gfp.sum.bg.af.sub),., by = "cell.id") %>% 
  bind_rows(pup1.cell.attr %>% 
              filter(degron != "stable"),.)

removing multiple entries of the cell because some cell have more than

multiple.pup1 <- pup1.cell.attr %>% 
  mutate(dapi.mean.bg.sub.puncta = ifelse(is.na(dapi.mean.bg.sub.puncta), 0, dapi.mean.bg.sub.puncta) , 
         rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0 , rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0) %>% 
  group_by(cell.id) %>% 
  tally() %>% 
  filter(n>1)

pup1.cell.attr<- pup1.cell.attr %>% 
  group_by(cell.id) %>% 
  mutate(rfp.mean.bg.sub.puncta = ifelse(cell.id %in% multiple.pup1$cell.id , mean(rfp.mean.bg.sub.puncta), rfp.mean.bg.sub.puncta),
         rfp.sum.bg.sub.puncta = ifelse(cell.id %in% multiple.pup1$cell.id, sum(rfp.mean.bg.sub.puncta), rfp.sum.bg.sub.puncta)) %>% 
    distinct(cell.id, .keep_all = TRUE) 
  

#proteasome inhibition experiments

protInhi.attr <- read_csv("~/plots/all_data/all_mg135_attr.csv") %>% 
  rename("cell.id" = "unique.trackID") %>% 
  filter(timepoint == 1) %>% 
  mutate(cell.id = paste0(cell.id ,"_",degron,"_",red,"_",treatment)) %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

multiple.proInh <- protInhi.attr %>% 
  mutate(dapi.mean.bg.sub.puncta = ifelse(is.na(dapi.mean.bg.sub.puncta), 0, dapi.mean.bg.sub.puncta) , 
         rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0 , rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0) %>% 
  group_by(cell.id) %>% 
  tally() %>% 
  filter(n>1)

protInhi.attr <- protInhi.attr %>% 
  group_by(cell.id) %>% 
  mutate(rfp.mean.bg.sub.puncta = ifelse(cell.id %in% multiple.proInh$cell.id , mean(rfp.mean.bg.sub.puncta), rfp.mean.bg.sub.puncta),
         rfp.sum.bg.sub.puncta = ifelse(cell.id %in% multiple.proInh$cell.id, sum(rfp.mean.bg.sub.puncta), rfp.sum.bg.sub.puncta)) %>% 
    distinct(cell.id, .keep_all = TRUE)  
  

Adding the protein inhibition experiment data

pup1.proInhi.attr <- pup1.cell.attr %>% 
  bind_rows(.,protInhi.attr %>% 
              dplyr::select(-time, 
                            -timepoint,
                            -image.no, 
                            -ln.gfp, 
                            -ln.gfp.dif,
                            -trackID,
                            -gfp.intensity.center,
                            -gfp.int.mean,
                            -gfp.int.median,
                           -gfp.int.sum,
                           -no.of.triangles,
                           -field,
                           -time.dif.gfp,
                           -real.time.gfp,
                           -sample,
                           -avg.gfp.bg,
                           -Min_gfp,
                           -gfp.mean.bg.sub,
                           -value,
                           -t80,
                           -t95,
                           -threshold,
                           -threshold_95,
                           -gfp.mean.bg.af.sub,
                           -threshold_80,
                           -Mean_gfp,
                           -image,
                           -experiment,
                           -gfp.sum.bg.sub))

#keeping the dy.dm model parameters

#only dy.dm
dy.pup1.rep1.mat <- aic.df %>% 
  filter(red == "pup1-rfp") %>% 
  filter(ifelse(degron %in% c("stable","stable.2","stable.3"), dy < 0.1, dy < 0.5)) %>% 
  group_by(cell.id) %>% 
  filter(model == "dy.dm") %>% 
  mutate(dm = ifelse(is.na(dm), Inf, dm)) %>%
  filter( dm > 0.000015)

#count the number of cells in each experiment

dy.pup1.rep1.mat %>% group_by(degron, treatment) %>% tally()

#creating the df for the MLR

mlr.df <- dy.pup1.rep1.mat %>% 
  #left join with the df with cellular attributes
  left_join(.,pup1.proInhi.attr, by = c("cell.id","red","treatment","degron", "exp.field","colony")) %>% 
  #remove cells with no rfp puncta info 
  mutate(rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0, rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0)

#checking how many cells there are in each experiment

mlr.df %>% group_by(degron, treatment) %>% tally()

#looking the placement of single cells in images

##Removing the columns that I wont regress upon

mlr.df <- mlr.df %>% 
  filter(pos.x > 45 & pos.x < 1150,
  pos.y > 45 & pos.y < 1000) %>%
  dplyr::select(-f, 
         -rfp.mean.af.sub.puncta,
         -rfp.sum.af.sub.puncta, 
         -dapi.sum.af.sub.puncta,
         -dapi.mean.af.sub.puncta,
         -model, 
         -k,
         -fevals,
         -gevals, 
         -niter, 
         -convcode, 
         -kkt1,
         -kkt2,
         -value,
         -xtime, 
         # -treatment,
         -rfp.mean.bg.sub,
         -rfp.sum.bg.sub,
         -dapi.mean.bg.sub,
         -dapi.sum.bg.sub,
         -dapi.mean.bg.sub.puncta,
         -area.puncta,
         -BB_B,
         -BB_C,
         -pos.x,
         -pos.y,
         -gfp.sum.bg.af.sub,
         -rfp.sum.bg.sub.puncta,
         -no.of.voxels,
         -volume,
         -spheracity) %>% 
  mutate(shape = Elip_B/Elip_C, 
         t.half = log(2)/dy) %>%
  ungroup() %>%
  dplyr::select(-cell.id, 
                -red,
                -exp.field, 
                -aic,
                -Elip_B,
                -Elip_C, 
                -exp) 

#Regressors to use for MLR

regressors <- c("dm",
                "gfp.mean.bg.af.sub.new",
                # "gfp.sum.bg.af.sub",
                "area",
                # "no.of.voxels",
                # "spheracity",
                # "volume",
                "rfp.mean.bg.sub.puncta",
                # "rfp.sum.bg.sub.puncta",
                "dapi.sum.bg.sub.puncta",
                "shape")

Function to evaluate step R

stepR.fn <- function(reg, df, prt.inh.trt, id.var) {
  #get the estimates
  stepR.est <-
    lapply(reg, function(a) {
      #regressors to remove one by one
      temp.df2 <- df %>%
        dplyr::select(all_of(reg), 
                      degron,
                      t.half,
                      # dy, 
                      colony, 
                      treatment) %>% #removed t.half and replaced with dy
        ungroup() %>%
        dplyr::select(-a)
      
      temp.df2 %>%
        filter(treatment %in% prt.inh.trt) %>%
        split(.[id.var]) %>%
        map(., function(b) {
          lm(t.half ~ .,
             data = b %>% dplyr::select(-degron, -colony, -treatment)) %>%
            broom::tidy()
        }) %>%
        bind_rows(.id = id.var) %>%
        mutate(removed.col = a)
      
    }) %>%
    bind_rows()
  
  #get the stats of the model
  stepR.glance <- lapply(reg, function(a) {
    temp.df2 <- df %>%
      dplyr::select(all_of(reg), degron, t.half,
                    # dy, 
                    colony, treatment) %>% #removed t.half and replaced with dy
      dplyr::select(-a)
    
    temp.df2 %>%
      filter(treatment %in% prt.inh.trt) %>%
      split(.[id.var]) %>%
      map(., function(b) {
        lm(t.half ~ .,
           data = b %>% dplyr::select(-degron,-colony,-treatment)) %>%
          broom::glance()
      }) %>%
      bind_rows(.id = id.var) %>%
      mutate(removed.col = a)
    
  }) %>%
    bind_rows()
  
  #get the fits
  stepR.fit <- lapply(reg, function(a) {
    temp.df2 <- df %>%
      dplyr::select(all_of(reg), degron, t.half,
                    # dy, 
                    colony, treatment) %>% #removed t.half and replaced with dy
      dplyr::select(-a)
    
    temp.df2 %>%
      filter(treatment %in% prt.inh.trt) %>%
      split(.[id.var]) %>%
      map(., function(b) {
        lm(t.half ~ .,
           data = b %>% dplyr::select(-degron, -colony,-treatment))
      })
  })
  
  #output
  stepR.list <- list(stepR.est,
                     stepR.glance,
                     stepR.fit)
  
  names(stepR.list) <- c("stepR.est",
                         "stepR.glance",
                         "stepR.fit")
  return(stepR.list)
}

function to evaluate the full model

fullMLR.fn <- function(regressors, df, prt.inh.trt, id.var) {
  #statistics
  fullR.glance <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron, 
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony, -treatment)) %>%
        broom::glance()
    }) %>%
    bind_rows(.id = id.var) %>%
    mutate(removed.col = "none")
  
  #estimates
  fullR.est <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron,
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony,-treatment)) %>%
        broom::tidy()
    }) %>%
    bind_rows(.id = id.var) %>%
    mutate(removed.col = "none")
  
  #fits
  fullMLR.fit <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron, 
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony, -treatment))
    })
  
  #output
  fullMLR.list <- list(fullR.glance,
                       fullR.est,
                       fullMLR.fit)
  
  names(fullMLR.list) <- c("fullR.glance",
                           "fullR.est",
                           "fullMLR.fit")
  return(fullMLR.list)
}

#df where the two experiments of cln2 and stable-GFP are clubbed

mlr.df2 <- mlr.df %>% 
  filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "yeGFP-CLN2",
                            degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
                            degron %in% c("stable","stable.2","stable.3") ~ "yeGFP")) 
  

step MLR estimates #without the protein inhibition

stepR.list <- stepR.fn(reg = regressors, 
         df = mlr.df2,
         prt.inh.trt = "none",
        id.var = "degron")
Note: Using an external vector in selections is ambiguous.
ℹ Use `all_of(a)` instead of `a` to silence this message.
ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
fullR.list <- fullMLR.fn(reg = regressors,
                         df = mlr.df2,
                         prt.inh.trt = "none",
                         id.var = "degron")

#2D correlation plots

lapply(regressors[1], function(a){
  mlr.df %>% 
    filter(treatment != "none") %>% 
  mutate(t.half = log(2)/dy) %>%
    # filter(degron %in% c("stable.2","stable.3")) %>% 
    # filter(!(degron %in% c("mODC","cln2.2","cln2","stable"))) %>% 
    # filter(treatment %in% c("dmso1","dmso2")) %>%  %>% 
      ggplot(.,aes(y = t.half, x = .[[a]]))+
    geom_point()+
    stat_cor()+
    # stat_regline_equation()+
    facet_wrap(~treatment , scales = "free")+
    labs(title = a)+
    scale_x_log10()
  
})
[[1]]
Warning: Use of `.[[a]]` is discouraged. Use `.data[[a]]` instead.
Warning: Use of `.[[a]]` is discouraged. Use `.data[[a]]` instead.

#y~x + c single variable regression single regressor statistics

stepR.glance <- lapply(regressors, function(a){
  temp.df2 <- mlr.df %>% 
    ungroup() %>% 
    dplyr::select(-a, -dy)
  
  temp.df2 %>% 
    filter(treatment == "none") %>% 
    split(list(.$degron)) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron,-colony,-treatment)) %>% 
        broom::glance()
    }) %>% 
    bind_rows(.id = "degron") %>% 
    mutate(removed.col = a)
   
}) %>% 
  bind_rows()


stepR.glance.dmso <- lapply(regressors, function(a){ #regressors to remove one by one 
  temp.df2 <- mlr.df %>% 
    ungroup() %>% 
    dplyr::select(-a, -dy)
  
  temp.df2 %>% 
    filter(treatment %in% c("dmso1","dmso2")) %>% 
    split(list(.$degron, .$treatment)) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron, -colony, -treatment)) %>% 
        broom::glance()
    }) %>% 
    bind_rows(.id = "degron") %>% 
    mutate(removed.col = a)
   
}) %>% 
  bind_rows() 

single regressor estimates

stepR.1var.est <- lapply(regressors, function(a){
  temp.df2 <- temp.df %>% 
    ungroup() %>% 
    dplyr::select(a, t.half,degron)
  
  temp.df2 %>% 
    split(.$degron) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron)) %>% 
        broom::tidy()
    }) %>% 
    bind_rows(.id = "degron") %>% 
    mutate(single.col = a)
   
}) %>% 
  bind_rows()

#fits
stepR.1var.fit <- lapply(regressors, function(a){
  temp.df2 <- mlr.df %>% 
    ungroup() %>% 
    dplyr::select(a, t.half,degron)
  
  temp.df2 %>% 
    split(.$degron) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron)) 
    }) 
}) 

#testing for one degron

fullMLR.fit <- temp.df %>%
    ungroup() %>% 
    dplyr::select(all_of(regressors), degron,t.half,colony) %>% 
    split(list(.$degron)) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron,-colony)) 
    })
Error in `dplyr::select()`:
! Can't subset columns that don't exist.
✖ Column `gfp.sum.bg.af.sub` doesn't exist.
Backtrace:
  1. ... %>% ...
  5. dplyr:::select.data.frame(...)
  8. tidyselect::eval_select(expr(c(...)), .data)
  9. tidyselect:::eval_select_impl(...)
 18. tidyselect:::vars_select_eval(...)
     ...
 21. tidyselect:::reduce_sels(node, data_mask, context_mask, init = init)
 22. tidyselect:::walk_data_tree(new, data_mask, context_mask)
 23. tidyselect:::as_indices_sel_impl(...)
 24. tidyselect:::as_indices_impl(x, vars, call = call, strict = strict)
 25. tidyselect:::chr_as_locations(x, vars, call = call)

#plotting the difference in r2 (full model - dropout model)

deltaR2.plt.all <- bind_rows(fullR.list$fullR.glance, stepR.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   # filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  dplyr::select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("Rate of Maturation" = "dm", 
         "GFP" = "gfp.mean.bg.af.sub.new",
         "Area" = "area",
         "Pup1-tDimer2" = "rfp.mean.bg.sub.puncta",
         "DAPI" = "dapi.sum.bg.sub.puncta",
         "Shape" = "shape") %>% 
  pivot_longer(cols = 3:8) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  # filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  mutate(degron = factor(degron , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 30))

deltaR2.plt.all

#transform dm and dapi intensity

stepR.list.trans <- stepR.fn(reg = regressors, 
         df = mlr.df2 %>% 
           mutate(dm = log10(dm)),
         prt.inh.trt = "none",
        id.var = "degron")

fullR.list.trans <- fullMLR.fn(reg = regressors,
                         df = mlr.df2 %>% 
           mutate(dm = log10(dm)),
                         prt.inh.trt = "none",
                         id.var = "degron")

#look at the sign of the estimates

lm(t.half ~ rfp.mean.bg.sub.puncta + dm + gfp.mean.bg.af.sub.new + dapi.sum.bg.sub.puncta, data = mlr.df2 %>% filter(degron == "GFP-mODC"))

Call:
lm(formula = t.half ~ rfp.mean.bg.sub.puncta + dm + gfp.mean.bg.af.sub.new + 
    dapi.sum.bg.sub.puncta, data = mlr.df2 %>% filter(degron == 
    "GFP-mODC"))

Coefficients:
           (Intercept)  rfp.mean.bg.sub.puncta                      dm  gfp.mean.bg.af.sub.new  dapi.sum.bg.sub.puncta  
             5.467e+00              -6.682e-03               6.376e-04               4.482e-03               1.647e-05  

#residual analysis of the models

#residual plots against the fitted values
fullMLR.fit %>% 
  map(.,autoplot)
$cln2

$cln2.2

$cln2.3

$cln2.4

$mODC

$mODC.2

$stable

$stable.2

$stable.3

Observations: from the residual vs fitted plots: For the degron GFPs the residual vs the fitted plot lies around the horizonttal band around 0. but for stable GFP, the residual plot is an open funnel shape. According to page 139 in the intro to linear regression: The patterns in panels b and c indicate that the variance of the errors is not constant. The outward-opening funnel pattern in panel b implies that the variance is an increasing function of y. From the residuals vs regressors plots: in yeGFP: there is a strong funnel shape with the GFP intensity: residuals is negatively related to the GFP intensity.

#t.half vs dm

mlr.df %>% 
  ggplot(.,aes(x = dm, y = dy))+
  geom_point()+
  facet_wrap(~degron, scales = "free")+
  scale_x_log10()

testing multiple colinearity using the full model method = variance inflation factors. vif > 5 for a regressor is problematic

library(car)

fullR.list$fullMLR.fit %>% map(.,vif)

#only the gfp, rfp, dapi regrrssors

regressors.fp <- c("gfp.mean.bg.af.sub.new",
                   "rfp.mean.bg.sub.puncta",
                   "dapi.sum.bg.sub.puncta")

stepR.fp.list <- stepR.fn(reg = regressors.fp, 
                          df = mlr.df2,
                          prt.inh.trt = "none",
                          id.var = "degron")

fullR.fp.list <- fullMLR.fn(regressors = regressors.fp, 
                          df = mlr.df2,
                          prt.inh.trt = "none",
                          id.var = "degron")

#variance inflation factor

vif(full.fp.fit$cln2.3) 
gfp.mean.bg.af.sub.new rfp.mean.bg.sub.puncta dapi.sum.bg.sub.puncta 
              1.085858               1.833896               1.928188 
vif(full.fp.fit$cln2.4) 
gfp.mean.bg.af.sub.new rfp.mean.bg.sub.puncta dapi.sum.bg.sub.puncta 
              1.024273               1.505917               1.477058 
vif(full.fp.fit$mODC.2) 
gfp.mean.bg.af.sub.new rfp.mean.bg.sub.puncta dapi.sum.bg.sub.puncta 
              1.001611               1.808745               1.807391 
vif(full.fp.fit$stable.3)
gfp.mean.bg.af.sub.new rfp.mean.bg.sub.puncta dapi.sum.bg.sub.puncta 
              1.008558               1.181293               1.172121 
vif(full.fp.fit$stable.2) 
gfp.mean.bg.af.sub.new rfp.mean.bg.sub.puncta dapi.sum.bg.sub.puncta 
              1.008374               1.427904               1.437518 

#delta r2 for only the fluorescent proteins as the regressors

deltaR2.plt <- bind_rows(fullR.fp.list$fullR.glance, stepR.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   # filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  dplyr::select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("GFP intensity" = "gfp.mean.bg.af.sub.new",
         "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
         "Total DAPI Int." = "dapi.sum.bg.sub.puncta") %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  # filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  mutate(degron = factor(degron , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))

deltaR2.plt

#response == dy instead of t.half

bind_rows(fullR.fp.list$fullR.glance, stepR.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("GFP intensity" = "gfp.mean.bg.af.sub.new",
         "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
         "Total DAPI Int." = "dapi.sum.bg.sub.puncta") %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
   filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  # mutate(degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))
Warning: Removed 1 rows containing missing values (position_stack).

#With the protein inhibitors

#with 6 regressors
stepR.prtInh.list <- stepR.fn(reg = regressors, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")
fullR.prtInh.list <- fullMLR.fn(reg = regressors, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")

#with just fluorecent proteins
stepR.prtInh.fp.list <- stepR.fn(reg = regressors.fp, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")
fullR.prtInh.fp.list <- fullMLR.fn(reg = regressors.fp, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")

plots

#with 6 regressors
bind_rows(fullR.prtInh.list$fullR.glance, stepR.prtInh.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  select(treatment, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  pivot_longer(cols = 3:8) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col()+
  facet_wrap(~factor(treatment, levels = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM")), scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("regressor dropped")

#with 3 fluorescent proteins
bind_rows(fullR.prtInh.fp.list$fullR.glance, stepR.prtInh.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  select(treatment, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col()+
  facet_wrap(~factor(treatment, levels = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM")), scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("regressor dropped")

#looked at the outliers

lapply(fullMLR.fit, function(a){
  cooks.distance(a) %>% 
    broom::tidy() %>% 
    arrange(desc(x))
}) 
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
$cln2

$cln2.2

$cln2.3

$cln2.4

$mODC

$mODC.2

$stable

$stable.2

$stable.3
NA
cooks.distance(fullMLR.fit$cln2.3) %>% broom::tidy() %>% arrange(desc(x))
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")
lapply(fullMLR.fit, summary)

#df to be used with step function

temp.df.list <- mlr.df2 %>% 
    filter(treatment == "none") %>%  
  # mutate( dm = log10(dm)) %>% 
  split(.$degron)

#using stepAIC from MASS

full.stepAIC.mass %>% map(.,broom::tidy)
$`GFP-CLN2`

$`GFP-mODC`

$yeGFP
NA

#step AIC with all the fp markers + cell shape

full.stepAIC <- lapply(temp.df.list, function(a){
  n <- nrow(a)
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", 
     trace = T,
     # k = log(n),
     k =2 ) #k = 2 when small dataset and k = log(n) where n = no. of observations for when the dataset is large
})
Note: Using an external vector in selections is ambiguous.
ℹ Use `all_of(regressors)` instead of `regressors` to silence this message.
ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
Start:  AIC=5023.53
t.half ~ dm + gfp.mean.bg.af.sub.new + area + rfp.mean.bg.sub.puncta + 
    dapi.sum.bg.sub.puncta + shape

                         Df Sum of Sq   RSS    AIC
- rfp.mean.bg.sub.puncta  1      32.7 36232 5023.0
- shape                   1      38.8 36239 5023.3
- area                    1      39.3 36239 5023.3
<none>                                36200 5023.5
- dm                      1     247.6 36447 5032.5
- dapi.sum.bg.sub.puncta  1     918.2 37118 5061.8
- gfp.mean.bg.af.sub.new  1    7805.7 44006 5335.7

Step:  AIC=5022.98
t.half ~ dm + gfp.mean.bg.af.sub.new + area + dapi.sum.bg.sub.puncta + 
    shape

                         Df Sum of Sq   RSS    AIC
- shape                   1      37.2 36270 5022.6
<none>                                36232 5023.0
- area                    1      89.4 36322 5024.9
- dm                      1     259.8 36492 5032.5
- dapi.sum.bg.sub.puncta  1     970.8 37203 5063.5
- gfp.mean.bg.af.sub.new  1    7803.3 44036 5334.8

Step:  AIC=5022.63
t.half ~ dm + gfp.mean.bg.af.sub.new + area + dapi.sum.bg.sub.puncta

                         Df Sum of Sq   RSS    AIC
<none>                                36270 5022.6
- area                    1      82.2 36352 5024.3
- dm                      1     249.7 36519 5031.7
- dapi.sum.bg.sub.puncta  1    1048.1 37318 5066.5
- gfp.mean.bg.af.sub.new  1    7848.3 44118 5335.8
Start:  AIC=1261.28
t.half ~ dm + gfp.mean.bg.af.sub.new + area + rfp.mean.bg.sub.puncta + 
    dapi.sum.bg.sub.puncta + shape

                         Df Sum of Sq    RSS    AIC
- shape                   1      1.67 3519.1 1260.1
- dm                      1      2.15 3519.6 1260.3
<none>                                3517.5 1261.3
- rfp.mean.bg.sub.puncta  1      5.47 3522.9 1261.8
- area                    1     85.84 3603.3 1299.2
- dapi.sum.bg.sub.puncta  1     89.54 3607.0 1300.9
- gfp.mean.bg.af.sub.new  1    479.97 3997.4 1471.2

Step:  AIC=1260.06
t.half ~ dm + gfp.mean.bg.af.sub.new + area + rfp.mean.bg.sub.puncta + 
    dapi.sum.bg.sub.puncta

                         Df Sum of Sq    RSS    AIC
- dm                      1      2.17 3521.3 1259.1
<none>                                3519.1 1260.1
- rfp.mean.bg.sub.puncta  1      5.05 3524.2 1260.4
- area                    1     84.64 3603.8 1297.4
- dapi.sum.bg.sub.puncta  1     87.90 3607.0 1298.9
- gfp.mean.bg.af.sub.new  1    482.37 4001.5 1470.9

Step:  AIC=1259.08
t.half ~ gfp.mean.bg.af.sub.new + area + rfp.mean.bg.sub.puncta + 
    dapi.sum.bg.sub.puncta

                         Df Sum of Sq    RSS    AIC
<none>                                3521.3 1259.1
- rfp.mean.bg.sub.puncta  1      5.13 3526.4 1259.5
- area                    1     85.97 3607.3 1297.0
- dapi.sum.bg.sub.puncta  1     87.30 3608.6 1297.7
- gfp.mean.bg.af.sub.new  1    493.27 4014.6 1474.3
Start:  AIC=922.36
t.half ~ dm + gfp.mean.bg.af.sub.new + area + rfp.mean.bg.sub.puncta + 
    dapi.sum.bg.sub.puncta + shape

                         Df Sum of Sq    RSS     AIC
<none>                                2628.7  922.36
- shape                   1      7.60 2636.3  924.18
- area                    1     49.53 2678.2  945.06
- rfp.mean.bg.sub.puncta  1     56.77 2685.5  948.63
- dapi.sum.bg.sub.puncta  1     93.01 2721.7  966.37
- dm                      1    105.69 2734.4  972.52
- gfp.mean.bg.af.sub.new  1    561.29 3190.0 1176.40
full.stepAIC %>% 
  map(., broom::glance) %>% 
  bind_rows(.id = "sample") 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3"))

full.stepAIC %>% 
  map(., broom::tidy) 
$yeGFP

$`yeGFP-CLN2`

$`yeGFP-mODC`
  # bind_rows(.id = "sample") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3"), 
         # p.value < 0.05) %>% 

full.stepAIC %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) 


full.stepAIC.fp %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) 
NA
NA

#Plot the aic difference at each step

full.stepAIC %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) %>% 
  select(degron, Step, del.aic) %>% 
  split(.$degron) %>% 
  map(.,function(a){
    a[nrow(a),] %>% 
      mutate(model = "reduced")
  }) %>% 
  bind_rows(.id = "degron") %>% 
  mutate(degron = factor(degron, levels = c("GFP-mODC", "GFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(x = del.aic, y = degron))+
  geom_col(width = 0.2)+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(AIC_{full} - AIC_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))

#step AIC with only the fluorescent markers

#looking at which regressor contributes to the overall variability 
full.stepAIC.fp %>% 
  map(., broom::tidy) 
$`GFP-CLN2`

$`GFP-mODC`

$yeGFP
NA

Variance in t-half explained by each regressor using anova

var.explained
$yeGFP

$`yeGFP-CLN2`

$`yeGFP-mODC`
NA

#plotting the variance explained

remain.reg <- data.frame(sample = c("yeGFP-CLN2","yeGFP-CLN2","yeGFP","yeGFP"), 
                         predictor = c("Rate of Maturation", "Shape","Shape","Pup1"), 
                         var.exp = rep(0,4),
                         value.Pr..F. = rep(0,4))
var.exp.plt <- var.explained %>% 
  bind_rows() %>% 
  bind_rows(.,remain.reg) %>% 
  filter(predictor != "Residuals") %>% 
  dplyr::select(sample, predictor, var.exp,value.Pr..F.) %>% 
  mutate(sample = factor(sample , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  # mutate(var.pal = ifelse(value.Pr..F. < 0.05, "<0.05",">0.05")) %>% 
  arrange(sample) %>% 
  ggplot(.,aes(x = predictor, y = var.exp ))+
  geom_col(width = 0.5)+
  facet_wrap(~sample, scales = "free")+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 5))+
  theme_pubr()+
  theme(text = element_text(size = 8),
        axis.line = element_line(size = 0.1),
        strip.background = element_blank())+
  ylim(0,20)+
  ylab("Variance explained (%)")+
  xlab("Cellular features")
var.exp.plt

#stepAIC with proteasome inhibition experiments With all fp regressors

temp.df.list.dmso <- mlr.df %>% 
    filter(treatment != "none") %>%  
  split(.$treatment)

full.stepAIC.fp.dmso <- lapply(temp.df.list.dmso, function(a){
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", trace = T)
})

lapply(full.stepAIC.fp.dmso, summary) %>% map(., broom::tidy) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample)

lapply(full.stepAIC.fp.dmso, summary) %>% map(., broom::glance) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample)

with fp regressors + celluar shape

mlr.df %>% 
    filter(treatment != "none") %>%  
  split(.$treatment)
$`1uM`

$`2.5uM`

$`50uM`

$`5uM`

$dmso1

$dmso2
NA

Variance in t-half explained by each regressor

full.stepAIC.dmso %>% 
  map(., anova %>% as.data.frame) %>% 
  bind_rows(.id = "sample") %>% 
  rownames_to_column(var = "predictor") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  group_by(sample) %>% 
  mutate(var.exp = (value.Sum.Sq/sum(value.Sum.Sq))*100) %>% 
  split(.$sample)
$`1uM`

$`2.5uM`

$`50uM`

$`5uM`

$dmso1

$dmso2
NA

#using regsubsets

library(leaps)
full.regSub.fp <- lapply(temp.df.list, function(a){
  n <- nrow(a)
 regsubsets(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment), 
     nvmax = NULL)
})

regSub.summary <- full.regSub.fp %>% map(.,summary)

lapply(regSub.summary, function(a){
  data.frame(model = a$outmat, 
             cp = a$cp,
             bic = a$bic)
}) %>% bind_rows(.id = "degron") %>% 
  group_by(degron) %>% 
  arrange(degron, cp)
full.regSub <- lapply(temp.df.list, function(a){
  n <- nrow(a)
  
  
 regsubsets(t.half ~ ., data = a %>% 
     dplyr::select(regressors,
                   t.half,
                   -dy, 
                   -treatment), 
     nvmax = NULL)
})

regSub.summary2 <- full.regSub %>% map(.,summary)

lapply(regSub.summary2, function(a){
  data.frame(model = a$outmat, 
             cp = a$cp,
             bic = a$bic)
}) %>% bind_rows(.id = "degron") %>% 
  group_by(degron) %>% 
  arrange(degron, bic
          ) %>% split(.$degron)

#plots:

gfp.6.cor.plt <- mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  pivot_longer(cols = 2:7) %>%
  mutate(degron = case_when(
      degron %in% c("cln2.3", "cln2.2", "cln2", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
      degron %in% c("stable", "stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(degron ~ name, scales = "free", ncol = 4) +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Half-life [min.]")+
  xlab("A.U")
Note: Using an external vector in selections is ambiguous.
ℹ Use `all_of(regressors)` instead of `regressors` to silence this message.
ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This message is displayed once per session.
Warning: Ignoring unknown parameters: guides
  
gfp.6.cor.plt
`geom_smooth()` using formula 'y ~ x'

 mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors.fp,
         degron) %>%
  mutate(norm.rfp = rfp.mean.bg.sub.puncta/area) 
Error in `mutate()`:
! Problem while computing `norm.rfp = rfp.mean.bg.sub.puncta/area`.
Caused by error in `rfp.mean.bg.sub.puncta / area`:
! non-numeric argument to binary operator
Backtrace:
 1. ... %>% mutate(norm.rfp = rfp.mean.bg.sub.puncta / area)
 3. dplyr:::mutate.data.frame(., norm.rfp = rfp.mean.bg.sub.puncta/area)
 4. dplyr:::mutate_cols(.data, dplyr_quosures(...), caller_env = caller_env())
 6. mask$eval_all_mutate(quo)

GFP intensity vs t-half cor plot

gfp.Int.cor.plt
`geom_smooth()` using formula 'y ~ x'

#patchwork for 3 cellular features and t-half

cellular.halfLife.plt <- (deltaR2.plt.all/gfp.3.cor.plt)+
  plot_layout(guides = "collect", heights = c(0.3,0.7))+
  plot_annotation(tag_levels = "A")

cellular.halfLife.plt
ggsave(plot = cellular.halfLife.plt, filename = "cellular_halfLife_cor.png", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 8)
ggsave(plot = cellular.halfLife.plt, filename = "cellular_halfLife_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 8)

#patchwork for GFP intensity and t-half

gfpInt.halfLife.plt <- (deltaR2.plt.all/gfp.Int.cor.plt)+
  plot_layout(guides = "collect")+
  plot_annotation(tag_levels = "A")

gfpInt.halfLife.plt
`geom_smooth()` using formula 'y ~ x'

ggsave(plot = gfpInt.halfLife.plt, filename = "gfpInt_halfLife_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 5.5, height = 6)
`geom_smooth()` using formula 'y ~ x'

#for GRC poster

ggsave(plot =gfp.Int.cor.plt , filename = "gfpInt_cor.pdf",path = "~/plots/paper1/figures/fig_2/", width =12, height = 3)
`geom_smooth()` using formula 'y ~ x'

with prottein inhibition

 mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c ("mODC") ) %>%
  # filter(!(treatment %in% c("none", "dmso1","50uM")), 
         # ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>%  
  dplyr::select(t.half,
         regressors,
         degron,
         treatment) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DAPI" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  pivot_longer(cols = 2:7) %>%
  mutate(degron = case_when(
    degron %in% c("mODC")  ~ "yeGFP-mODC"),
    treatment = factor(treatment, levels = c("none","dmso1","dmso2","1uM","2.5uM","5uM","50uM"))) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_grid(treatment ~ name, scales = "free") +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none")+
  ylab("Half-Life [min.]")+
  xlab("A.U")
Warning: Ignoring unknown parameters: guides
`geom_smooth()` using formula 'y ~ x'

mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c ("mODC") ) %>%
  filter(!(treatment %in% c("none", "dmso1","50uM")), 
         ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  dplyr::select(t.half,
         regressors,
         degron,
         treatment) %>% 
  ggplot(.,aes(x = area, y = rfp.mean.bg.sub.puncta))+
  geom_point()+
  geom_smooth(method = "lm", se = F)+
  facet_wrap(~treatment)+
  stat_cor()
mlr.df %>% 
  filter(treatment == "none") %>% 
  ggplot(.,aes(x = dapi.sum.bg.sub.puncta))+
  geom_histogram()+
  facet_wrap(~degron, scales = "free")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#ggpairs

gfp.area.cor.ggPair
$`yeGFP-mODC`

 plot: [1,1] [====>----------------------------------------------------------------------------------]  6% est: 0s 
 plot: [1,2] [==========>----------------------------------------------------------------------------] 12% est: 1s 
 plot: [1,3] [===============>-----------------------------------------------------------------------] 19% est: 1s 
 plot: [1,4] [=====================>-----------------------------------------------------------------] 25% est: 1s 
 plot: [2,1] [==========================>------------------------------------------------------------] 31% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [2,2] [================================>------------------------------------------------------] 38% est: 1s 
 plot: [2,3] [=====================================>-------------------------------------------------] 44% est: 1s 
 plot: [2,4] [===========================================>-------------------------------------------] 50% est: 1s 
 plot: [3,1] [================================================>--------------------------------------] 56% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [3,2] [=====================================================>---------------------------------] 62% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [3,3] [===========================================================>---------------------------] 69% est: 0s 
 plot: [3,4] [================================================================>----------------------] 75% est: 0s 
 plot: [4,1] [======================================================================>----------------] 81% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,2] [===========================================================================>-----------] 88% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,3] [=================================================================================>-----] 94% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,4] [=======================================================================================]100% est: 0s 
                                                                                                                   

$`yeGFP-CLN2`

 plot: [1,1] [====>----------------------------------------------------------------------------------]  6% est: 0s 
 plot: [1,2] [==========>----------------------------------------------------------------------------] 12% est: 0s 
 plot: [1,3] [===============>-----------------------------------------------------------------------] 19% est: 1s 
 plot: [1,4] [=====================>-----------------------------------------------------------------] 25% est: 1s 
 plot: [2,1] [==========================>------------------------------------------------------------] 31% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [2,2] [================================>------------------------------------------------------] 38% est: 1s 
 plot: [2,3] [=====================================>-------------------------------------------------] 44% est: 1s 
 plot: [2,4] [===========================================>-------------------------------------------] 50% est: 1s 
 plot: [3,1] [================================================>--------------------------------------] 56% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [3,2] [=====================================================>---------------------------------] 62% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [3,3] [===========================================================>---------------------------] 69% est: 0s 
 plot: [3,4] [================================================================>----------------------] 75% est: 0s 
 plot: [4,1] [======================================================================>----------------] 81% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,2] [===========================================================================>-----------] 88% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,3] [=================================================================================>-----] 94% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,4] [=======================================================================================]100% est: 0s 
                                                                                                                   

$yeGFP

 plot: [1,1] [====>----------------------------------------------------------------------------------]  6% est: 0s 
 plot: [1,2] [==========>----------------------------------------------------------------------------] 12% est: 0s 
 plot: [1,3] [===============>-----------------------------------------------------------------------] 19% est: 1s 
 plot: [1,4] [=====================>-----------------------------------------------------------------] 25% est: 1s 
 plot: [2,1] [==========================>------------------------------------------------------------] 31% est: 1s `geom_smooth()` using formula 'y ~ x'

 plot: [2,2] [================================>------------------------------------------------------] 38% est: 1s 
 plot: [2,3] [=====================================>-------------------------------------------------] 44% est: 0s 
 plot: [2,4] [===========================================>-------------------------------------------] 50% est: 0s 
 plot: [3,1] [================================================>--------------------------------------] 56% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [3,2] [=====================================================>---------------------------------] 62% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [3,3] [===========================================================>---------------------------] 69% est: 0s 
 plot: [3,4] [================================================================>----------------------] 75% est: 0s 
 plot: [4,1] [======================================================================>----------------] 81% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,2] [===========================================================================>-----------] 88% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,3] [=================================================================================>-----] 94% est: 0s `geom_smooth()` using formula 'y ~ x'

 plot: [4,4] [=======================================================================================]100% est: 0s 
                                                                                                                   

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-mODC`, filename = "gfp_mODC_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-mODC`, filename = "gfp_mODC_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-CLN2`, filename = "gfp_cln2_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-CLN2`, filename = "gfp_cln2_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP`, filename = "gfp_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP`, filename = "gfp_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)
test.ggP <- (wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$`yeGFP-mODC`))|wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$`yeGFP-CLN2`))| wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$yeGFP)))+
 plot_annotation(tag_levels = "A")
ggsave(plot = test.ggP, filename = "test_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 3)

#area vs pup1 vs dna vs gfp

df_corSup_fig <- mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  dplyr::select(Area, DNA, `Pup1-tDimer`,GFP,degron ) %>% 
  mutate(
    degron = case_when(
      degron %in% c("cln2.3", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2")  ~ "yeGFP-mODC",
      degron %in% c("stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) 

area.pup1 <- df_corSup_fig %>% 
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = Area, y = `Pup1-tDimer`))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Pup1-tDimer [A.U]")+
  xlab("Area [A.U]")
Warning: Ignoring unknown parameters: guides
dna.pup1 <- df_corSup_fig %>% 
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = DNA, y = `Pup1-tDimer`))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Pup1-tDimer [A.U]")+
  xlab("DNA [A.U]")
Warning: Ignoring unknown parameters: guides
dna.area <- df_corSup_fig %>%
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = DNA, y = Area))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Area [A.U]")+
  xlab("DNA [A.U]")
Warning: Ignoring unknown parameters: guides
gfpInt.cor.plt <- df_corSup_fig %>% 
  pivot_longer(cols = 1:3) %>% 
  # filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = value, y = GFP))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
  facet_wrap(degron~name, scales = "free")+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  xlab("A.U")
Warning: Ignoring unknown parameters: guides
ggsave(plot = test_cor, filename = "area.pup1.dna.gfp_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 7)
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'

#Partial correlations ppcor

#partial correlations
ppCor.list <- mlr.df2 %>% 
  filter(treatment == "none") %>% split(.$degron) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area,  dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
    ppcor::pcor(a)
  })

#correlations
cor.all <- mlr.df2 %>% 
  filter(treatment == "none") %>%  
  split(.$degron) %>% 
  map(.,function(a){
     a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta,rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
     cor(a)
  })
ppCor.df <- ppCor.list %>% 
  map(.,function(a){
    a$estimate %>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor) %>% 
      left_join(.,a$p.value%>% 
                  as.data.frame() %>% 
                  rownames_to_column(var = "regressor") %>% 
                  rename("p.value" = "t.half") %>% 
      dplyr::select(p.value, regressor), by = "regressor")
  }) %>% 
  bind_rows(.id = "degron")

single.core.df <- cor.all %>% 
  map(.,function(a){
    a%>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor)
  }) %>% 
  bind_rows(.id = "degron") %>% 
  rename("single.core"="t.half")
library(gtools)
ppCoor.fig <- ppCor.df %>% 
  left_join(.,single.core.df, by = c("degron","regressor")) %>% 
  rename("Partial-Pariwise"="t.half",
         "Standard-Pariwise" = "single.core") %>% 
  pivot_longer(cols = c("Partial-Pariwise","Standard-Pariwise")) %>% 
   mutate(regressor = case_when(regressor == "gfp.mean.bg.af.sub.new" ~ "GFP" ,
                               regressor == "area" ~ "Area",
                               regressor == "rfp.mean.bg.sub.puncta" ~ "Pup1",
                               regressor == "dapi.sum.bg.sub.puncta" ~ "DNA" ,
                               regressor == "t.half" ~ "half-life"), 
          degron = factor(degron, levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP")), 
          name = factor(name , levels = c("Standard-Pariwise", "Partial-Pariwise")), 
          sig = stars.pval(p.value), 
          sig = ifelse(sig == " ", "NS", sig)) %>% 
  # filter(p.value < 0.05) %>% 
  filter(regressor != "half-life") %>% 
  ggplot(.,aes(x = regressor , y = value, fill = name))+
  geom_col(width = 0.5, position = "dodge")+
  facet_wrap(~degron)+
  geom_hline(yintercept = 0, size = 0.1)+
  scale_y_continuous(breaks = seq(-0.5,0.5, by = 0.1))+
  theme_pubr()+
  theme(text = element_text(size = 8),
            legend.key.size = unit(4,"mm"),
        legend.position = "top", 
        axis.line = element_line(size = 0.1),
        strip.background = element_blank())+
  scale_fill_manual(values = c("lightsteelblue4" ," steelblue4"), name = NULL)+
  ylab("Correlation with Half-life ")+
  xlab("Cellular features")

ppCoor.fig

#patchwork for 4 cellular features (area, gfp, dapi, pup1) and t-half

(gfp.6.cor.plt/var.exp.plt/ppCoor.fig)
`geom_smooth()` using formula 'y ~ x'

ggsave(plot = gfp.4cor.hf.ppCor.plt, filename = "gfp_4cor_halfLife_ppcor.png", path = "~/plots/paper1/figures/fig_2/", width = 7, height = 9)
`geom_smooth()` using formula 'y ~ x'
ggsave(plot = gfp.4cor.hf.ppCor.plt, filename = "gfp_4cor_halfLife_ppcor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 7, height = 9)
`geom_smooth()` using formula 'y ~ x'

#with protein inhibion

ppCor.PrtIn.list <- mlr.df %>% 
    filter(treatment != "none") %>%  
  filter(ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  split(.$treatment) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>%
      na.omit()
    ppcor::pcor(a)
  })

cor.PrtIn.all <- mlr.df %>% 
    filter(treatment != "none") %>%  
  filter(ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  split(.$treatment) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
    cor(a)
  })
ppCor.PrtIn.df <- ppCor.PrtIn.list %>% 
  map(.,function(a){
    a$estimate %>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor) %>% 
      left_join(.,a$p.value%>% 
                  as.data.frame() %>% 
                  rownames_to_column(var = "regressor") %>% 
                  rename("p.value" = "t.half") %>% 
      dplyr::select(p.value, regressor), by = "regressor")
  }) %>% 
  bind_rows(.id = "treatment")

single.core.Prtn.df <- cor.PrtIn.all %>% 
  map(.,function(a){
    a%>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor)
  }) %>% 
  bind_rows(.id = "treatment") %>% 
  rename("single.core"="t.half")

correlation of pup1 with mODC with treated with proteasome inhibitors

#when you normalize pup1 with the area

mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  mutate(norm.pup1 = `Pup1-tDimer`/Area) %>% 
  pivot_longer(cols = c(2:7,9)) %>%
  mutate(degron = case_when(
      degron %in% c("cln2.3", "cln2.2", "cln2", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
      degron %in% c("stable", "stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(degron ~ name, scales = "free", ncol = 4) +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none")+
  ylab("Half-Life [min.]")+
  xlab("A.U")
---
title: "Correlations"
output: html_notebook
---
```{r}
library(ggplot2)
library(tidyverse)
library(GGally)
library(ggpubr)
library(latex2exp)
library(patchwork)

```

#load the dataframes for the figure
```{r}
aic.df <- read_csv("~/plots/all_data/aic.csv")

aic.df %>% filter(treatment == "5uM", model == "dy.dm") %>% mutate(t.half = log(2)/dy) %>% filter(t.half > 130)

```


remove cells in the edges :
pos.x > 45 & < 1150
pos.y > 45 & < 1000
##add the replicate info 
```{r}
aic.df <- aic.df %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

aic.df <- aic.df %>% 
  mutate(colony = ifelse(degron == "stable" & red == "pup1-rfp", 
                         case_when(exp.field %in% c("20min_s4", "20min_s5") ~ "Replicate 1",
                                   exp.field %in% c("20min_s6", "20min_s7") ~ "Replicate 2",
                                   exp.field %in% c("20min_s8", "20min_s9") ~ "Replicate 3"), colony)) 
  
```

##Cellular attributes
```{r}
pup1.cell.attr <- read_csv("~/plots/all_data/all_pup1_cell_attr.csv")
```

###pup1-RFP background
```{r}
pup1.cell.attr <- pup1.cell.attr %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

pup1.cell.attr <- pup1.cell.attr %>% 
  mutate(colony = ifelse(degron == "stable" & red == "pup1-rfp", 
                         case_when(exp.field %in% c("20min_s4", "20min_s5") ~ "Replicate 1",
                                   exp.field %in% c("20min_s6", "20min_s7") ~ "Replicate 2",
                                   exp.field %in% c("20min_s8", "20min_s9") ~ "Replicate 3"), colony)) 
  
```

```{r}
#removing the 40min and 60min experiments from the stable gfp experiments
pup1.cell.attr <- pup1.cell.attr %>%
  mutate(exp = str_split(exp.field, "_", simplify = T)[,1]) %>% 
  filter(exp == "20min")
```

Adding the gfp.sum column for the stable GFP experiment
```{r}
pup1.cell.attr <- read_csv("~/plots/pup1-rfp-gfp-decay/4-28-21-8hrGFP/data/gfp_stable_filtered.csv") %>% 
  filter(image.no == 1) %>% 
  dplyr::select(cell.id, gfpSumBgAFsub) %>% 
  mutate(cell.id = paste0(cell.id,"_","stable_pup1-rfp_none")) %>% 
  dplyr::rename("gfp.sum.bg.af.sub" = "gfpSumBgAFsub") %>% 
  left_join(pup1.cell.attr %>% 
              filter(degron == "stable") %>% 
              dplyr::select(-gfp.sum.bg.af.sub),., by = "cell.id") %>% 
  bind_rows(pup1.cell.attr %>% 
              filter(degron != "stable"),.)
```

removing multiple entries of the cell because some cell have more than 
```{r}
multiple.pup1 <- pup1.cell.attr %>% 
  mutate(dapi.mean.bg.sub.puncta = ifelse(is.na(dapi.mean.bg.sub.puncta), 0, dapi.mean.bg.sub.puncta) , 
         rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0 , rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0) %>% 
  group_by(cell.id) %>% 
  tally() %>% 
  filter(n>1)

pup1.cell.attr<- pup1.cell.attr %>% 
  group_by(cell.id) %>% 
  mutate(rfp.mean.bg.sub.puncta = ifelse(cell.id %in% multiple.pup1$cell.id , mean(rfp.mean.bg.sub.puncta), rfp.mean.bg.sub.puncta),
         rfp.sum.bg.sub.puncta = ifelse(cell.id %in% multiple.pup1$cell.id, sum(rfp.mean.bg.sub.puncta), rfp.sum.bg.sub.puncta)) %>% 
    distinct(cell.id, .keep_all = TRUE) 
  
```


#proteasome inhibition experiments
```{r}
protInhi.attr <- read_csv("~/plots/all_data/all_mg135_attr.csv") %>% 
  rename("cell.id" = "unique.trackID") %>% 
  filter(timepoint == 1) %>% 
  mutate(cell.id = paste0(cell.id ,"_",degron,"_",red,"_",treatment)) %>% 
  mutate(exp.field = paste0(str_split(cell.id, "_", simplify = T)[,2],"_",
                            str_split(cell.id, "_", simplify = T)[,3]),
    colony = case_when(exp.field %in% c("20min_s3", "20min_s4") ~ "Replicate 1", 
                            exp.field %in% c("20min_s5", "20min_s6") ~ "Replicate 2",
                            exp.field %in% c("20min_s7", "20min_s8" , "20min_s9") ~ "Replicate 3"))

multiple.proInh <- protInhi.attr %>% 
  mutate(dapi.mean.bg.sub.puncta = ifelse(is.na(dapi.mean.bg.sub.puncta), 0, dapi.mean.bg.sub.puncta) , 
         rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0 , rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0) %>% 
  group_by(cell.id) %>% 
  tally() %>% 
  filter(n>1)

protInhi.attr <- protInhi.attr %>% 
  group_by(cell.id) %>% 
  mutate(rfp.mean.bg.sub.puncta = ifelse(cell.id %in% multiple.proInh$cell.id , mean(rfp.mean.bg.sub.puncta), rfp.mean.bg.sub.puncta),
         rfp.sum.bg.sub.puncta = ifelse(cell.id %in% multiple.proInh$cell.id, sum(rfp.mean.bg.sub.puncta), rfp.sum.bg.sub.puncta)) %>% 
    distinct(cell.id, .keep_all = TRUE)  
  

```
Adding the protein inhibition experiment data
```{r}
pup1.proInhi.attr <- pup1.cell.attr %>% 
  bind_rows(.,protInhi.attr %>% 
              dplyr::select(-time, 
                            -timepoint,
                            -image.no, 
                            -ln.gfp, 
                            -ln.gfp.dif,
                            -trackID,
                            -gfp.intensity.center,
                            -gfp.int.mean,
                            -gfp.int.median,
                           -gfp.int.sum,
                           -no.of.triangles,
                           -field,
                           -time.dif.gfp,
                           -real.time.gfp,
                           -sample,
                           -avg.gfp.bg,
                           -Min_gfp,
                           -gfp.mean.bg.sub,
                           -value,
                           -t80,
                           -t95,
                           -threshold,
                           -threshold_95,
                           -gfp.mean.bg.af.sub,
                           -threshold_80,
                           -Mean_gfp,
                           -image,
                           -experiment,
                           -gfp.sum.bg.sub))
```


#keeping the dy.dm model parameters 
```{r}
#only dy.dm
dy.pup1.rep1.mat <- aic.df %>% 
  filter(red == "pup1-rfp") %>% 
  filter(ifelse(degron %in% c("stable","stable.2","stable.3"), dy < 0.1, dy < 0.5)) %>% 
  group_by(cell.id) %>% 
  filter(model == "dy.dm") %>% 
  mutate(dm = ifelse(is.na(dm), Inf, dm)) %>%
  filter( dm > 0.000015)
```

#count the number of cells in each experiment 
```{r}
dy.pup1.rep1.mat %>% group_by(degron, treatment) %>% tally()
```
#creating the df for the MLR
```{r}
mlr.df <- dy.pup1.rep1.mat %>% 
  #left join with the df with cellular attributes
  left_join(.,pup1.proInhi.attr, by = c("cell.id","red","treatment","degron", "exp.field","colony")) %>% 
  #remove cells with no rfp puncta info 
  mutate(rfp.mean.bg.sub.puncta = ifelse(is.na(rfp.mean.bg.sub.puncta), 0, rfp.mean.bg.sub.puncta)) %>% 
  filter(rfp.mean.bg.sub.puncta > 0)
```

#checking how many cells there are in each experiment 
```{r}
mlr.df %>% group_by(degron, treatment) %>% tally()
```
#looking the placement of single cells in images
```{r}
mlr.df %>% 
  filter()
  ggplot(.,aes(x = pos.x, y = pos.y))+
  geom_point()+
  facet_wrap(~degron)
```
##Removing the columns that I wont regress upon
```{r}
mlr.df <- mlr.df %>% 
  filter(pos.x > 45 & pos.x < 1150,
  pos.y > 45 & pos.y < 1000) %>%
  dplyr::select(-f, 
         -rfp.mean.af.sub.puncta,
         -rfp.sum.af.sub.puncta, 
         -dapi.sum.af.sub.puncta,
         -dapi.mean.af.sub.puncta,
         -model, 
         -k,
         -fevals,
         -gevals, 
         -niter, 
         -convcode, 
         -kkt1,
         -kkt2,
         -value,
         -xtime, 
         # -treatment,
         -rfp.mean.bg.sub,
         -rfp.sum.bg.sub,
         -dapi.mean.bg.sub,
         -dapi.sum.bg.sub,
         -dapi.mean.bg.sub.puncta,
         -area.puncta,
         -BB_B,
         -BB_C,
         -pos.x,
         -pos.y,
         -gfp.sum.bg.af.sub,
         -rfp.sum.bg.sub.puncta,
         -no.of.voxels,
         -volume,
         -spheracity) %>% 
  mutate(shape = Elip_B/Elip_C, 
         t.half = log(2)/dy) %>%
  ungroup() %>%
  dplyr::select(-cell.id, 
                -red,
                -exp.field, 
                -aic,
                -Elip_B,
                -Elip_C, 
                -exp) 
```

#Regressors to use for MLR
```{r}
regressors <- c("dm",
                "gfp.mean.bg.af.sub.new",
                # "gfp.sum.bg.af.sub",
                "area",
                # "no.of.voxels",
                # "spheracity",
                # "volume",
                "rfp.mean.bg.sub.puncta",
                # "rfp.sum.bg.sub.puncta",
                "dapi.sum.bg.sub.puncta",
                "shape")
```

Function to evaluate step R 
```{r}
stepR.fn <- function(reg, df, prt.inh.trt, id.var) {
  #get the estimates
  stepR.est <-
    lapply(reg, function(a) {
      #regressors to remove one by one
      temp.df2 <- df %>%
        dplyr::select(all_of(reg), 
                      degron,
                      t.half,
                      # dy, 
                      colony, 
                      treatment) %>% #removed t.half and replaced with dy
        ungroup() %>%
        dplyr::select(-a)
      
      temp.df2 %>%
        filter(treatment %in% prt.inh.trt) %>%
        split(.[id.var]) %>%
        map(., function(b) {
          lm(t.half ~ .,
             data = b %>% dplyr::select(-degron, -colony, -treatment)) %>%
            broom::tidy()
        }) %>%
        bind_rows(.id = id.var) %>%
        mutate(removed.col = a)
      
    }) %>%
    bind_rows()
  
  #get the stats of the model
  stepR.glance <- lapply(reg, function(a) {
    temp.df2 <- df %>%
      dplyr::select(all_of(reg), degron, t.half,
                    # dy, 
                    colony, treatment) %>% #removed t.half and replaced with dy
      dplyr::select(-a)
    
    temp.df2 %>%
      filter(treatment %in% prt.inh.trt) %>%
      split(.[id.var]) %>%
      map(., function(b) {
        lm(t.half ~ .,
           data = b %>% dplyr::select(-degron,-colony,-treatment)) %>%
          broom::glance()
      }) %>%
      bind_rows(.id = id.var) %>%
      mutate(removed.col = a)
    
  }) %>%
    bind_rows()
  
  #get the fits
  stepR.fit <- lapply(reg, function(a) {
    temp.df2 <- df %>%
      dplyr::select(all_of(reg), degron, t.half,
                    # dy, 
                    colony, treatment) %>% #removed t.half and replaced with dy
      dplyr::select(-a)
    
    temp.df2 %>%
      filter(treatment %in% prt.inh.trt) %>%
      split(.[id.var]) %>%
      map(., function(b) {
        lm(t.half ~ .,
           data = b %>% dplyr::select(-degron, -colony,-treatment))
      })
  })
  
  #output
  stepR.list <- list(stepR.est,
                     stepR.glance,
                     stepR.fit)
  
  names(stepR.list) <- c("stepR.est",
                         "stepR.glance",
                         "stepR.fit")
  return(stepR.list)
}
```

function to evaluate the full model
```{r}
fullMLR.fn <- function(regressors, df, prt.inh.trt, id.var) {
  #statistics
  fullR.glance <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron, 
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony, -treatment)) %>%
        broom::glance()
    }) %>%
    bind_rows(.id = id.var) %>%
    mutate(removed.col = "none")
  
  #estimates
  fullR.est <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron,
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony,-treatment)) %>%
        broom::tidy()
    }) %>%
    bind_rows(.id = id.var) %>%
    mutate(removed.col = "none")
  
  #fits
  fullMLR.fit <- df %>%
    ungroup() %>%
    filter(treatment %in% prt.inh.trt) %>%
    dplyr::select(all_of(regressors), 
                  degron, 
                  t.half,
                  # dy, 
                  colony, 
                  treatment) %>% #removed t.half and replaced with dy
    split(.[id.var]) %>%
    map(., function(b) {
      lm(t.half ~ .,
         data = b %>% dplyr::select(-degron, -colony, -treatment))
    })
  
  #output
  fullMLR.list <- list(fullR.glance,
                       fullR.est,
                       fullMLR.fit)
  
  names(fullMLR.list) <- c("fullR.glance",
                           "fullR.est",
                           "fullMLR.fit")
  return(fullMLR.list)
}
```

#df where the two experiments of cln2 and stable-GFP are clubbed 
```{r}
mlr.df2 <- mlr.df %>% 
  filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "yeGFP-CLN2",
                            degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
                            degron %in% c("stable","stable.2","stable.3") ~ "yeGFP")) 
  
```

step MLR estimates 
#without the protein inhibition
```{r}
stepR.list <- stepR.fn(reg = regressors, 
         df = mlr.df2,
         prt.inh.trt = "none",
        id.var = "degron")

fullR.list <- fullMLR.fn(reg = regressors,
                         df = mlr.df2,
                         prt.inh.trt = "none",
                         id.var = "degron")
```


#2D correlation plots
```{r}
lapply(regressors[1], function(a){
  mlr.df %>% 
    filter(treatment != "none") %>% 
  mutate(t.half = log(2)/dy) %>%
    # filter(degron %in% c("stable.2","stable.3")) %>% 
    # filter(!(degron %in% c("mODC","cln2.2","cln2","stable"))) %>% 
    # filter(treatment %in% c("dmso1","dmso2")) %>%  %>% 
      ggplot(.,aes(y = t.half, x = .[[a]]))+
    geom_point()+
    stat_cor()+
    # stat_regline_equation()+
    facet_wrap(~treatment , scales = "free")+
    labs(title = a)+
    scale_x_log10()
  
})

```

#y~x + c single variable regression
single regressor statistics
```{r}
stepR.1var <- lapply(regressors, function(a){
  temp.df2 <- mlr.df %>% 
    ungroup() %>% 
    dplyr::select(a, t.half,degron)
  
  temp.df2 %>% 
    split(.$degron) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron)) %>% 
        broom::glance()
    }) %>% 
    bind_rows(.id = "degron") %>% 
    mutate(single.col = a)
   
}) %>% 
  bind_rows()
```

single regressor estimates 
```{r}
stepR.1var.est <- lapply(regressors, function(a){
  temp.df2 <- temp.df %>% 
    ungroup() %>% 
    dplyr::select(a, t.half,degron)
  
  temp.df2 %>% 
    split(.$degron) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron)) %>% 
        broom::tidy()
    }) %>% 
    bind_rows(.id = "degron") %>% 
    mutate(single.col = a)
   
}) %>% 
  bind_rows()

#fits
stepR.1var.fit <- lapply(regressors, function(a){
  temp.df2 <- mlr.df %>% 
    ungroup() %>% 
    dplyr::select(a, t.half,degron)
  
  temp.df2 %>% 
    split(.$degron) %>% 
    map(.,function(b){
      lm(t.half ~ ., data = b %>% dplyr::select(-degron)) 
    }) 
}) 
```

#testing for one degron
```{r}
cln2.3.df <- temp.df %>% 
  filter(degron == "cln2.3") %>% 
  ungroup() %>% 
  select(-dy,-degron)

lm(t.half ~ gfp.mean.bg.af.sub.new 
   + rfp.sum.bg.sub.puncta 
   + area 
   - no.of.voxels 
   - dapi.sum.bg.sub.puncta 
   + spheracity
   + shape
   + volume
     , data = cln2.3.df ) %>% 
  summary()
```

#plotting the difference in r2 (full model - dropout model)
```{r}
deltaR2.plt.all <- bind_rows(fullR.list$fullR.glance, stepR.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   # filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  dplyr::select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("Rate of Maturation" = "dm", 
         "GFP" = "gfp.mean.bg.af.sub.new",
         "Area" = "area",
         "Pup1-tDimer2" = "rfp.mean.bg.sub.puncta",
         "DAPI" = "dapi.sum.bg.sub.puncta",
         "Shape" = "shape") %>% 
  pivot_longer(cols = 3:8) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  # filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  mutate(degron = factor(degron , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  scale_y_discrete(labels = function(x) str_wrap(x, width = 10))+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 30))

deltaR2.plt.all
```

#transform dm and dapi intensity
```{r}
stepR.list.trans <- stepR.fn(reg = regressors, 
         df = mlr.df2 %>% 
           mutate(dm = log10(dm)),
         prt.inh.trt = "none",
        id.var = "degron")

fullR.list.trans <- fullMLR.fn(reg = regressors,
                         df = mlr.df2 %>% 
           mutate(dm = log10(dm)),
                         prt.inh.trt = "none",
                         id.var = "degron")
```

```{r}
bind_rows(fullR.list.trans$fullR.glance, stepR.list.trans$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("Rate of Maturation" = "dm", 
         "GFP intensity" = "gfp.mean.bg.af.sub.new",
         "Area" = "area",
         "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
         "Total DAPI Int." = "dapi.sum.bg.sub.puncta",
         "Cellular Shape" = "shape") %>% 
  pivot_longer(cols = 3:8) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  # filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  mutate(degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  theme_pubr()+
  theme(text = element_text(size = 8), 
        axis.text.x = element_text(angle = 30))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme(text = element_text(size = 8))


```

#look at the sign of the estimates 
```{r}
fullR.list.trans$fullR.est %>% split(.$degron)

lm(t.half ~ rfp.mean.bg.sub.puncta + dm + gfp.mean.bg.af.sub.new + dapi.sum.bg.sub.puncta, data = mlr.df2 %>% filter(degron == "GFP-mODC"))
```

#residual analysis of the models
```{r}
#residual plots against the fitted values
fullMLR.fit %>% 
  map(.,autoplot)


#residual plots against the regressors
fullMLR.fit %>% 
  map(.,function(a){
    plt.df <- a$model %>% 
      mutate(dm = log10(dm),
        res = a$residuals) %>% 
      pivot_longer(cols = 2:7)
    
    plt.df %>% 
      ggplot(.,aes(x = value, y = res))+
      geom_point()+
      geom_hline(yintercept = 0, color = "red4")+
      facet_wrap(~name, scales = "free")+
      xlab("Regressor")+
      ylab("Residuals")
      
  })


```
Observations: 
from the residual vs fitted plots: 
For the degron GFPs the residual vs the fitted plot lies around the horizonttal band around 0. 
but for stable GFP, the residual plot is an open funnel shape. 
According to page 139 in the intro to linear regression: 
The patterns in panels b and c indicate that the variance of the errors is not constant. The outward-opening funnel pattern in panel b implies that the variance is an increasing function of y. 
From the residuals vs regressors plots:
in yeGFP: there is a strong funnel shape with the GFP intensity: residuals is negatively related to the GFP intensity. 

#t.half vs dm 
```{r}
mlr.df %>% 
  ggplot(.,aes(x = dm, y = dy))+
  geom_point()+
  facet_wrap(~degron, scales = "free")+
  scale_x_log10()
```

testing multiple colinearity using the full model 
method = variance inflation factors. vif > 5 for a regressor is problematic
```{r}
library(car)

fullR.list$fullMLR.fit %>% map(.,vif)

```

#only the gfp, rfp, dapi regrrssors 
```{r}
regressors.fp <- c("gfp.mean.bg.af.sub.new",
                   "rfp.mean.bg.sub.puncta",
                   "dapi.sum.bg.sub.puncta")

stepR.fp.list <- stepR.fn(reg = regressors.fp, 
                          df = mlr.df2,
                          prt.inh.trt = "none",
                          id.var = "degron")

fullR.fp.list <- fullMLR.fn(regressors = regressors.fp, 
                          df = mlr.df2,
                          prt.inh.trt = "none",
                          id.var = "degron")
```

#variance inflation factor
```{r}
vif(full.fp.fit$cln2.3) 
vif(full.fp.fit$cln2.4) 
vif(full.fp.fit$mODC.2) 
vif(full.fp.fit$stable.3)
vif(full.fp.fit$stable.2) 
```

#delta r2 for only the fluorescent proteins as the regressors
```{r}
deltaR2.plt <- bind_rows(fullR.fp.list$fullR.glance, stepR.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   # filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  dplyr::select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("GFP intensity" = "gfp.mean.bg.af.sub.new",
         "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
         "Total DAPI Int." = "dapi.sum.bg.sub.puncta") %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  # filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  mutate(degron = factor(degron , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))

deltaR2.plt
```
#response == dy instead of t.half 
```{r}
bind_rows(fullR.fp.list$fullR.glance, stepR.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  # separate(.,degron, into = c("degron","colony"), sep = ".Replicate ") %>%  
   filter(p.value < 0.05) %>% 
  # group_by(degron, removed.col) %>% 
  select(degron, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  rename("GFP intensity" = "gfp.mean.bg.af.sub.new",
         "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
         "Total DAPI Int." = "dapi.sum.bg.sub.puncta") %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
   filter(degron %in% c("cln2.3","mODC.2","stable.3","stable.2","cln2.4")) %>% 
  # mutate(degron = case_when(degron %in% c("cln2.3","cln2.2","cln2","cln2.4") ~ "GFP-CLN2",
  #                           degron %in% c("mODC.2", "mODC")  ~ "GFP-mODC",
  #                           degron %in% c("stable","stable.2","stable.3") ~ "yeGFP"),
  #        degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>%
  # mutate(degron = factor(degron , levels = c("GFP-mODC","GFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col(width = 0.2)+
  facet_wrap(~degron, scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))
```


#With the protein inhibitors
```{r}
#with 6 regressors
stepR.prtInh.list <- stepR.fn(reg = regressors, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")
fullR.prtInh.list <- fullMLR.fn(reg = regressors, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")

#with just fluorecent proteins
stepR.prtInh.fp.list <- stepR.fn(reg = regressors.fp, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")
fullR.prtInh.fp.list <- fullMLR.fn(reg = regressors.fp, 
                              df = mlr.df, 
                              prt.inh.trt = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM"), 
                              id.var = "treatment")

```

plots
```{r}
#with 6 regressors
bind_rows(fullR.prtInh.list$fullR.glance, stepR.prtInh.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  select(treatment, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  pivot_longer(cols = 3:8) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col()+
  facet_wrap(~factor(treatment, levels = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM")), scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("regressor dropped")

#with 3 fluorescent proteins
bind_rows(fullR.prtInh.fp.list$fullR.glance, stepR.prtInh.fp.list$stepR.glance) %>%
    mutate(adj.r.squared = round(adj.r.squared , digits = 4)) %>% 
  select(treatment, r.squared, removed.col) %>% 
  pivot_wider(values_from = r.squared, names_from = removed.col) %>% 
  pivot_longer(cols = 3:5) %>% 
  mutate(value.ratio = value/ none,
         value.sub = none - value) %>% 
  ggplot(.,aes(y = name, x = value.sub))+
  geom_col()+
  facet_wrap(~factor(treatment, levels = c("dmso1","dmso2","1uM","2.5uM","5uM","50uM")), scales = "free")+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(r^{2}_{full} - r^{2}_{step})"))+
  ylab("regressor dropped")
```

#looked at the outliers
```{r}
install.packages("qpcR")
library(qpcR)

PRESS(fullMLR.fit)
PRESS(stepR.fp.fit[[2]]$cln2.3)
PRESS(stepR.fp.fit[[3]]$cln2.3)

library(ggfortify)
autoplot(fullMLR.fit$cln2.3)
autoplot(stepR.fp.fit[[1]]$cln2.3)

fullMLR.fit$cln2.3$model

plot(stepR.fp.fit[[2]]$cln2.3)
cooks.distance(fullMLR.fit$cln2.3) %>% broom::tidy() %>% arrange(desc(x))

mlr.df %>% filter(degron == "cln2.3") %>% 
  rownames_to_column() %>% 
  filter(rowname == "916")

lapply(fullMLR.fit, autoplot)

lapply(fullMLR.fit, function(a){
  cooks.distance(a) %>% 
    broom::tidy() %>% 
    arrange(desc(x))
}) 

```

```{r}
cooks.distance(fullMLR.fit$cln2.3) %>% broom::tidy() %>% arrange(desc(x))

outliers.cln2.3 <- mlr.df %>% filter(degron == "cln2.3") %>% 
  rownames_to_column() %>% 
  filter(rowname %in% c( "178","564","761","916")) %>% pull(cell.id)
```

```{r}
lapply(fullMLR.fit, summary)
```


#df to be used with step function
```{r}
temp.df.list <- mlr.df2 %>% 
    filter(treatment == "none") %>%  
  # mutate( dm = log10(dm)) %>% 
  split(.$degron)
```


#using stepAIC from MASS
```{r}
library(MASS)

full.stepAIC.mass <- lapply(temp.df.list, function(a){
  n <- nrow(a)
  lm.obj <- lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors,
                   t.half,
                   -dy, 
                   -treatment))
     
 stepAIC(object = lm.obj, 
     direction = "backward", 
     trace = T)
})

full.stepAIC.mass %>% map(.,broom::tidy)

```

#step AIC with all the fp markers + cell shape 
```{r}
full.stepAIC <- lapply(temp.df.list, function(a){
  n <- nrow(a)
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", 
     trace = T,
     # k = log(n),
     k =2 ) #k = 2 when small dataset and k = log(n) where n = no. of observations for when the dataset is large
})


full.stepAIC %>% 
  map(., broom::glance) %>% 
  bind_rows(.id = "sample") 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3"))

full.stepAIC %>% 
  map(., broom::tidy) 
  # bind_rows(.id = "sample") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3"), 
         # p.value < 0.05) %>% 

full.stepAIC %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) 


full.stepAIC.fp %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) 

```

#Plot the aic difference at each step
```{r}
full.stepAIC %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC[1]-AIC) %>% 
  select(degron, Step, del.aic) %>% 
  split(.$degron) %>% 
  map(.,function(a){
    a[nrow(a),] %>% 
      mutate(model = "reduced")
  }) %>% 
  bind_rows(.id = "degron") %>% 
  mutate(degron = factor(degron, levels = c("GFP-mODC", "GFP-CLN2","yeGFP"))) %>% 
  ggplot(.,aes(x = del.aic, y = degron))+
  geom_col(width = 0.2)+
  theme(text = element_text(size = 8))+
  # scale_x_log10()+
  xlab(TeX("$Delta(AIC_{full} - AIC_{step})"))+
  ylab("Regressor Dropped")+
  theme_pubr()+
  theme(text = element_text(size = 8))

```

#step AIC with only the fluorescent markers 
```{r}
full.stepAIC.fp <- lapply(temp.df.list, function(a){
  n <- nrow(a)
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", 
     trace = T,
     k = 2)
})

#looking at which regressor contributes to the overall variability 
full.stepAIC.fp %>% 
  map(., broom::tidy) 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  # split(.$sample)

#model fit stats
full.stepAIC.fp %>% 
  map(., broom::glance) %>% 
  bind_rows(.id = "sample")


full.stepAIC.fp %>% 
  map(.,function(a){
    a$anova 
  }) %>% 
  bind_rows(.,.id = "degron") %>% 
  mutate(Step = as.character(Step), 
         Step = ifelse(Step == "", "Full",Step)) %>% 
  group_by(degron) %>% 
  mutate(del.aic = AIC-AIC[1]) 
```

Variance in t-half explained by each regressor
using anova

```{r}
full.stepAIC.fp %>% 
  map(., anova %>% as.data.frame) %>% 
  bind_rows(.id = "sample") %>% 
  rownames_to_column(var = "predictor") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  group_by(sample) %>% 
  mutate(var.exp = (value.Sum.Sq/sum(value.Sum.Sq))*100) %>% 
  split(.$sample)

var.explained <- full.stepAIC %>% 
  map(., anova %>% as.data.frame) %>% 
  bind_rows(.id = "sample") %>% 
  rownames_to_column(var = "predictor") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  group_by(sample) %>% 
  mutate(var.exp = (value.Sum.Sq/sum(value.Sum.Sq))*100, 
         predictor = str_split(predictor, pattern = "\\.\\.\\.", simplify = T)[,1], 
         predictor = case_when(predictor == "gfp.mean.bg.af.sub.new" ~ "GFP" ,
                               predictor == "area" ~ "Area",
                               predictor == "rfp.mean.bg.sub.puncta" ~ "Pup1",
                               predictor == "dapi.sum.bg.sub.puncta" ~ "DNA" ,
                               predictor == "shape" ~ "Shape", 
                               predictor == "dm" ~ "Rate of Maturation"
                               , 
                               TRUE~predictor)) %>% 
  split(.$sample)

var.explained
```

#plotting the variance explained 
```{r}
remain.reg <- data.frame(sample = c("yeGFP-CLN2","yeGFP-CLN2","yeGFP","yeGFP"), 
                         predictor = c("Rate of Maturation", "Shape","Shape","Pup1"), 
                         var.exp = rep(0,4),
                         value.Pr..F. = rep(0,4))
var.exp.plt <- var.explained %>% 
  bind_rows() %>% 
  bind_rows(.,remain.reg) %>% 
  filter(predictor != "Residuals") %>% 
  dplyr::select(sample, predictor, var.exp,value.Pr..F.) %>% 
  mutate(sample = factor(sample , levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP"))) %>% 
  # mutate(var.pal = ifelse(value.Pr..F. < 0.05, "<0.05",">0.05")) %>% 
  arrange(sample) %>% 
  ggplot(.,aes(x = predictor, y = var.exp ))+
  geom_col(width = 0.5)+
  facet_wrap(~sample, scales = "free")+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 5))+
  theme_pubr()+
  theme(text = element_text(size = 8),
        axis.line = element_line(size = 0.1),
        strip.background = element_blank())+
  ylim(0,20)+
  ylab("Variance explained (%)")+
  xlab("Cellular features")
var.exp.plt
```

#stepAIC with proteasome inhibition experiments
With all fp regressors
```{r}
temp.df.list.dmso <- mlr.df %>% 
    filter(treatment != "none") %>%  
  split(.$treatment)

full.stepAIC.fp.dmso <- lapply(temp.df.list.dmso, function(a){
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", trace = T)
})

lapply(full.stepAIC.fp.dmso, summary) %>% map(., broom::tidy) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample)

lapply(full.stepAIC.fp.dmso, summary) %>% map(., broom::glance) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample)
```

with fp regressors + celluar shape 
```{r}
temp.df.list.dmso <- mlr.df %>% 
    filter(treatment != "none") %>%  
  split(.$treatment)

full.stepAIC.dmso <- lapply(temp.df.list.dmso, function(a){
 step(lm(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment)), 
     direction = "backward", trace = T)
})

lapply(full.stepAIC.dmso, summary) %>% map(., broom::tidy) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample)

lapply(full.stepAIC.dmso, summary) %>% map(., broom::glance) %>% 
  bind_rows(.id = "sample") %>% 
  split(.$sample) 

```

Variance in t-half explained by each regressor
```{r}
full.stepAIC.dmso %>% 
  map(., anova %>% as.data.frame) %>% 
  bind_rows(.id = "sample") %>% 
  rownames_to_column(var = "predictor") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  group_by(sample) %>% 
  mutate(var.exp = (value.Sum.Sq/sum(value.Sum.Sq))*100) %>% 
  split(.$sample)

full.stepAIC.fp.dmso %>% 
  map(., anova %>% as.data.frame) %>% 
  bind_rows(.id = "sample") %>% 
  rownames_to_column(var = "predictor") %>% 
  # filter(sample %in% c("mODC.2","cln2.3","cln2.4","stable.2","stable.3")) %>% 
  group_by(sample) %>% 
  mutate(var.exp = (value.Sum.Sq/sum(value.Sum.Sq))*100) %>% 
  split(.$sample)

```



#using regsubsets
```{r}
library(leaps)
```

```{r}
full.regSub.fp <- lapply(temp.df.list, function(a){
  n <- nrow(a)
 regsubsets(t.half ~ ., data = a %>% 
     dplyr::select(regressors.fp,
                   t.half,
                   -dy, 
                   -treatment), 
     nvmax = NULL)
})

regSub.summary <- full.regSub.fp %>% map(.,summary)

lapply(regSub.summary, function(a){
  data.frame(model = a$outmat, 
             cp = a$cp,
             bic = a$bic)
}) %>% bind_rows(.id = "degron") %>% 
  group_by(degron) %>% 
  arrange(degron, cp)
```

```{r}
full.regSub <- lapply(temp.df.list, function(a){
  n <- nrow(a)
  
  
 regsubsets(t.half ~ ., data = a %>% 
     dplyr::select(regressors,
                   t.half,
                   -dy, 
                   -treatment), 
     nvmax = NULL)
})

regSub.summary2 <- full.regSub %>% map(.,summary)

lapply(regSub.summary2, function(a){
  data.frame(model = a$outmat, 
             cp = a$cp,
             bic = a$bic)
}) %>% bind_rows(.id = "degron") %>% 
  group_by(degron) %>% 
  arrange(degron, bic
          ) %>% split(.$degron)
```



#plots: 
```{r}
gfp.6.cor.plt <- mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  pivot_longer(cols = 2:7) %>%
  mutate(degron = case_when(
      degron %in% c("cln2.3", "cln2.2", "cln2", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
      degron %in% c("stable", "stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(degron ~ name, scales = "free", ncol = 4) +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Half-life [min.]")+
  xlab("A.U")
  
gfp.6.cor.plt
```

```{r}
gfp.3.cor.plt <- mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors.fp,
         degron) %>%
  rename(
    "GFP intensity" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "Total DNA Int." = "dapi.sum.bg.sub.puncta"
  ) %>%
  pivot_longer(cols = 2:4) %>%
  mutate(
    degron = case_when(
      degron %in% c("cln2.3", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2")  ~ "yeGFP-mODC",
      degron %in% c("stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  ggplot(., aes(x = value, y = t.half)) +
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
  stat_cor(size = 2) +
  geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(degron ~ name, scales = "free") +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.position = "none",
            panel.grid.minor = element_blank())+
  ylab("Half-Life [min.]")+
  xlab("Intensity [A.U.]")

gfp.3.cor.plt
```
GFP intensity vs t-half cor plot 
```{r}
gfp.Int.cor.plt <- mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         gfp.mean.bg.af.sub.new,
         degron) %>%
  mutate(
    degron = case_when(
      degron %in% c("cln2.3", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2")  ~ "yeGFP-mODC",
      degron %in% c("stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  ggplot(., aes(x = gfp.mean.bg.af.sub.new, y = t.half)) +
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
  stat_cor(size = 6) +
  geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(~degron , scales = "free") +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 12),
            axis.text.x = element_text(angle = 30),
            legend.position = "none",
            panel.grid.minor = element_blank())+
  ylab("Half-Life [min.]")+
  xlab("GFP Intensity [A.U.]")

gfp.Int.cor.plt
```

#patchwork for 3 cellular features and t-half
```{r fig.height= 4, fig.width= 5 }
cellular.halfLife.plt <- (deltaR2.plt.all/gfp.3.cor.plt)+
  plot_layout(guides = "collect", heights = c(0.3,0.7))+
  plot_annotation(tag_levels = "A")

cellular.halfLife.plt
```

```{r}
ggsave(plot = cellular.halfLife.plt, filename = "cellular_halfLife_cor.png", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 8)
ggsave(plot = cellular.halfLife.plt, filename = "cellular_halfLife_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 8)
```

#patchwork for GFP intensity and t-half 
```{r  }
gfpInt.halfLife.plt <- (deltaR2.plt.all/gfp.Int.cor.plt)+
  plot_layout(guides = "collect")+
  plot_annotation(tag_levels = "A")

gfpInt.halfLife.plt
```
```{r}
ggsave(plot = gfpInt.halfLife.plt, filename = "gfpInt_halfLife_cor.png", path = "~/plots/paper1/figures/fig_2/", width = 5.5, height = 6)
ggsave(plot = gfpInt.halfLife.plt, filename = "gfpInt_halfLife_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 5.5, height = 6)
```



#for GRC poster
```{r}
ggsave(plot = var.exp.plt, filename = "var_explained.pdf",path = "~/plots/paper1/figures/fig_2/", width =12, height = 3)
ggsave(plot =gfp.Int.cor.plt , filename = "gfpInt_cor.pdf",path = "~/plots/paper1/figures/fig_2/", width =12, height = 3)
```

with prottein inhibition
```{r}
 mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c ("mODC") ) %>%
  # filter(!(treatment %in% c("none", "dmso1","50uM")), 
         # ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>%  
  dplyr::select(t.half,
         regressors,
         degron,
         treatment) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  pivot_longer(cols = 2:7) %>%
  mutate(degron = case_when(
    degron %in% c("mODC")  ~ "yeGFP-mODC"),
    treatment = factor(treatment, levels = c("none","dmso1","dmso2","1uM","2.5uM","5uM","50uM"))) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_grid(treatment ~ name, scales = "free") +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none")+
  ylab("Half-Life [min.]")+
  xlab("A.U")

```

```{r}
mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c ("mODC") ) %>%
  filter(!(treatment %in% c("none", "dmso1","50uM")), 
         ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  dplyr::select(t.half,
         regressors,
         degron,
         treatment) %>% 
  ggplot(.,aes(x = area, y = rfp.mean.bg.sub.puncta))+
  geom_point()+
  geom_smooth(method = "lm", se = F)+
  facet_wrap(~treatment)+
  stat_cor()
```


```{r}
mlr.df %>% 
  filter(treatment == "none") %>% 
  ggplot(.,aes(x = dapi.sum.bg.sub.puncta))+
  geom_histogram()+
  facet_wrap(~degron, scales = "free")
```
#ggpairs 

```{r}
#to be used with ggpairs
my_fn <- function(data, mapping, ...){
  p <- ggplot(data = data, mapping = mapping) + 
    ggpointdensity::geom_pointdensity(size = 0.1)+
    # geom_point(size = 0.1, alpha = 0.1) + 
    geom_smooth(method="lm",  color="red4", se = FALSE, lwd = 0.5)+
    theme(legend.position = "none")
  p
}

gfp.area.cor.ggPair <- mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  dplyr::select(Area, DNA, `Pup1-tDimer`,GFP,degron ) %>% 
  mutate(
    degron = case_when(
      degron %in% c("cln2.3", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2")  ~ "yeGFP-mODC",
      degron %in% c("stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  split(.$degron) %>% 
  map(.,function(a){
    a %>% 
      ggpairs(.,
          # legend = 5,
          columns = 1:4,
          # mapping = ggplot2::aes(color = Replicate),
          lower = list(continuous = my_fn,
                      discrete = "blank", 
                      combo="blank"), 
          # upper = "blank",
          # diag = NULL,
           diag = list(discrete="barDiag",
                      continuous = wrap("densityDiag", alpha=0.5 )),
          upper = list(discrete= wrap("barDiag" , outlier.size = 0.5),
                       combo = wrap("box_no_facet", outlier.size = 0.5),
                       continuous = wrap("cor", display_grid = FALSE, size = 2.5 , method = "pearson", color = "red4")),
          labeller = label_wrap_gen(width = 2, multi_line = TRUE))+
  theme_bw()+
  theme(text = element_text(size = 8),
        legend.position = "none",
        panel.grid.major = element_blank(),
        axis.text.x = element_text(angle = 30, hjust = 1))+
      labs(title = a$degron[1])
  })
  
gfp.area.cor.ggPair
```
```{r}
ggsave(plot = gfp.area.cor.ggPair$`yeGFP-mODC`, filename = "gfp_mODC_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-mODC`, filename = "gfp_mODC_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-CLN2`, filename = "gfp_cln2_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP-CLN2`, filename = "gfp_cln2_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP`, filename = "gfp_4Cor_ggP.png", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)

ggsave(plot = gfp.area.cor.ggPair$`yeGFP`, filename = "gfp_4Cor_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 4, height = 3.5)
```

```{r}
test.ggP <- (wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$`yeGFP-mODC`))|wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$`yeGFP-CLN2`))| wrap_elements(ggmatrix_gtable(gfp.area.cor.ggPair$yeGFP)))+
 plot_annotation(tag_levels = "A")
```

```{r}
ggsave(plot = test.ggP, filename = "test_ggP.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 3)
```

#area vs pup1 vs dna vs gfp
```{r}
df_corSup_fig <- mlr.df %>%
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  dplyr::select(Area, DNA, `Pup1-tDimer`,GFP,degron ) %>% 
  mutate(
    degron = case_when(
      degron %in% c("cln2.3", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2")  ~ "yeGFP-mODC",
      degron %in% c("stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) 
```

```{r}

area.pup1 <- df_corSup_fig %>% 
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = Area, y = `Pup1-tDimer`))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Pup1-tDimer [A.U]")+
  xlab("Area [A.U]")

dna.pup1 <- df_corSup_fig %>% 
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = DNA, y = `Pup1-tDimer`))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Pup1-tDimer [A.U]")+
  xlab("DNA [A.U]")

dna.area <- df_corSup_fig %>%
  filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = DNA, y = Area))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  ylab("Area [A.U]")+
  xlab("DNA [A.U]")
```

```{r}
gfpInt.cor.plt <- df_corSup_fig %>% 
  pivot_longer(cols = 1:3) %>% 
  # filter(degron == "yeGFP-mODC") %>% 
  ggplot(.,aes(x = value, y = GFP))+
  ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 3) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
  facet_wrap(degron~name, scales = "free")+
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none", 
            axis.line = element_line(size = 0.1))+
  xlab("A.U")
```

```{r}
test_cor <- ((area.pup1|dna.pup1|dna.area)/gfpInt.cor.plt)+
  plot_layout(heights = c(0.2, 0.8))+
  plot_annotation(tag_levels = "A")

ggsave(plot = test_cor, filename = "area.pup1.dna.gfp_cor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 6.5, height = 7)
```


#Partial correlations
ppcor
```{r}
#partial correlations
ppCor.list <- mlr.df2 %>% 
  filter(treatment == "none") %>% split(.$degron) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area,  dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
    ppcor::pcor(a)
  })

#correlations
cor.all <- mlr.df2 %>% 
  filter(treatment == "none") %>%  
  split(.$degron) %>% 
  map(.,function(a){
     a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta,rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
     cor(a)
  })

```

```{r}
ppCor.df <- ppCor.list %>% 
  map(.,function(a){
    a$estimate %>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor) %>% 
      left_join(.,a$p.value%>% 
                  as.data.frame() %>% 
                  rownames_to_column(var = "regressor") %>% 
                  rename("p.value" = "t.half") %>% 
      dplyr::select(p.value, regressor), by = "regressor")
  }) %>% 
  bind_rows(.id = "degron")

single.core.df <- cor.all %>% 
  map(.,function(a){
    a%>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor)
  }) %>% 
  bind_rows(.id = "degron") %>% 
  rename("single.core"="t.half")

```

```{r}
library(gtools)
```

```{r}
ppCoor.fig <- ppCor.df %>% 
  left_join(.,single.core.df, by = c("degron","regressor")) %>% 
  rename("Partial-Pariwise"="t.half",
         "Standard-Pariwise" = "single.core") %>% 
  pivot_longer(cols = c("Partial-Pariwise","Standard-Pariwise")) %>% 
   mutate(regressor = case_when(regressor == "gfp.mean.bg.af.sub.new" ~ "GFP" ,
                               regressor == "area" ~ "Area",
                               regressor == "rfp.mean.bg.sub.puncta" ~ "Pup1",
                               regressor == "dapi.sum.bg.sub.puncta" ~ "DNA" ,
                               regressor == "t.half" ~ "half-life"), 
          degron = factor(degron, levels = c("yeGFP-mODC","yeGFP-CLN2","yeGFP")), 
          name = factor(name , levels = c("Standard-Pariwise", "Partial-Pariwise")), 
          sig = stars.pval(p.value), 
          sig = ifelse(sig == " ", "NS", sig)) %>% 
  # filter(p.value < 0.05) %>% 
  filter(regressor != "half-life") %>% 
  ggplot(.,aes(x = regressor , y = value, fill = name))+
  geom_col(width = 0.5, position = "dodge")+
  facet_wrap(~degron)+
  geom_hline(yintercept = 0, size = 0.1)+
  scale_y_continuous(breaks = seq(-0.5,0.5, by = 0.1))+
  theme_pubr()+
  theme(text = element_text(size = 8),
            legend.key.size = unit(4,"mm"),
        legend.position = "top", 
        axis.line = element_line(size = 0.1),
        strip.background = element_blank())+
  scale_fill_manual(values = c("lightsteelblue4" ," steelblue4"), name = NULL)+
  ylab("Correlation with Half-life ")+
  xlab("Cellular features")

ppCoor.fig
```
#patchwork for 4 cellular features (area, gfp, dapi, pup1) and t-half 

```{r fig.width= 5, fig.height= 6}
gfp.4cor.hf.ppCor.plt <- (gfp.6.cor.plt/var.exp.plt/ppCoor.fig)+
  plot_layout( heights = c(0.6,0.2, 0.2))+
  plot_annotation(tag_levels = "A")

gfp.4cor.hf.ppCor.plt
```

```{r}
ggsave(plot = gfp.4cor.hf.ppCor.plt, filename = "gfp_4cor_halfLife_ppcor.png", path = "~/plots/paper1/figures/fig_2/", width = 7, height = 9)

ggsave(plot = gfp.4cor.hf.ppCor.plt, filename = "gfp_4cor_halfLife_ppcor.pdf", path = "~/plots/paper1/figures/fig_2/", width = 7, height = 9)
```

#with protein inhibion
```{r}
ppCor.PrtIn.list <- mlr.df %>% 
    filter(treatment != "none") %>%  
  filter(ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  split(.$treatment) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>%
      na.omit()
    ppcor::pcor(a)
  })

cor.PrtIn.all <- mlr.df %>% 
    filter(treatment != "none") %>%  
  filter(ifelse(treatment == "5uM", t.half < 138.6294, t.half == t.half)) %>% 
  split(.$treatment) %>% 
  map(.,function(a){
    a <- a %>% 
      dplyr::select(t.half, area, dapi.sum.bg.sub.puncta, rfp.mean.bg.sub.puncta, gfp.mean.bg.af.sub.new) %>% 
      na.omit()
    cor(a)
  })

```

```{r}
ppCor.PrtIn.df <- ppCor.PrtIn.list %>% 
  map(.,function(a){
    a$estimate %>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor) %>% 
      left_join(.,a$p.value%>% 
                  as.data.frame() %>% 
                  rownames_to_column(var = "regressor") %>% 
                  rename("p.value" = "t.half") %>% 
      dplyr::select(p.value, regressor), by = "regressor")
  }) %>% 
  bind_rows(.id = "treatment")

single.core.Prtn.df <- cor.PrtIn.all %>% 
  map(.,function(a){
    a%>% 
      as.data.frame() %>% 
      rownames_to_column(var = "regressor") %>% 
      dplyr::select(t.half, regressor)
  }) %>% 
  bind_rows(.id = "treatment") %>% 
  rename("single.core"="t.half")
```
correlation of pup1 with mODC with treated with proteasome inhibitors
```{r}
ppcor.fig.df <- ppCor.PrtIn.df %>% 
  left_join(.,single.core.Prtn.df, by = c("treatment","regressor")) %>% 
  filter(!(treatment %in% c("dmso1","50uM"))) %>% 
  rename("Partial-Pariwise"="t.half",
         "Standard-Pariwise" = "single.core") %>% 
  pivot_longer(cols = c("Partial-Pariwise","Standard-Pariwise")) %>% 
   mutate(regressor = case_when(regressor == "gfp.mean.bg.af.sub.new" ~ "GFP" ,
                               regressor == "area" ~ "Area",
                               regressor == "rfp.mean.bg.sub.puncta" ~ "Pup1",
                               regressor == "dapi.sum.bg.sub.puncta" ~ "DNA" ,
                               regressor == "t.half" ~ "half-life", 
                               TRUE ~ regressor), 
          treatment = factor(treatment, levels = c("dmso1","50uM","dmso2","1uM","2.5uM","5uM")), 
          name = factor(name , levels = c("Standard-Pariwise", "Partial-Pariwise")), 
          sig = stars.pval(p.value), 
          sig = ifelse(sig == "\ ", "N.S", sig))
          # new.value = ifelse((name == "Partial-Pariwise" & p.value > 0.05), 0, value)) %>% 
  # filter(p.value < 0.05) %>% 
ppcor.fig.df %>% 
  filter(!(regressor %in% c("half-life", "shape")), 
         regressor == "Pup1", 
         treatment %in% c("dmso2","1uM","2.5uM","5uM")) %>% 
  ggplot(.,aes(x = treatment , y =value, fill = name))+
  geom_col(width = 0.5, position = "dodge")+
  geom_text(data = ppcor.fig.df %>% 
              filter(!(regressor %in% c("half-life", "shape")), 
         regressor == "Pup1", 
         treatment %in% c("dmso2","1uM","2.5uM","5uM")) , 
         aes(label = sig), nudge_x = -0.1)+
  # facet_wrap(~treatment, nrow = 1)+
  scale_y_continuous(breaks = seq(-0.5,0.5, by = 0.1))+
  theme_pubr()+
  theme(text = element_text(size = 8),
            legend.key.size = unit(4,"mm"),
        legend.position = "top")+
  scale_fill_manual(values = c("lightsteelblue4" ," steelblue4"), name = NULL)+
  geom_hline(yintercept = 0)+
  ylab("Correlation of Pup1 with Half-life ")+
  xlab("Protein inhibitor treatment (MG132)")
```

#when you normalize pup1 with the area
```{r}
mlr.df %>%
  mutate(dm = log10(dm)) %>% 
  filter(degron %in% c("mODC.2", "cln2.3", "cln2.4", "stable.2", "stable.3")) %>%
  dplyr::select(t.half,
         regressors,
         degron) %>%
  rename(
    "GFP" = "gfp.mean.bg.af.sub.new",
    "Pup1-tDimer" = "rfp.mean.bg.sub.puncta",
    "DNA" = "dapi.sum.bg.sub.puncta",
    "Area" = "area"
  ) %>%
  mutate(norm.pup1 = `Pup1-tDimer`/Area) %>% 
  pivot_longer(cols = c(2:7,9)) %>%
  mutate(degron = case_when(
      degron %in% c("cln2.3", "cln2.2", "cln2", "cln2.4") ~ "yeGFP-CLN2",
      degron %in% c("mODC.2", "mODC")  ~ "yeGFP-mODC",
      degron %in% c("stable", "stable.2", "stable.3") ~ "yeGFP"
    ),
    degron = factor(degron , levels = c("yeGFP-mODC", "yeGFP-CLN2", "yeGFP"))
  ) %>%
  filter(!(name %in% c("shape","dm"))) %>% 
  ggplot(., aes(x = value, y = t.half)) +
      ggpointdensity::geom_pointdensity(size = 0.2, guides = F) +
      stat_cor(size = 2) +
      geom_smooth(method = "lm", se = FALSE, color = "red4", size = 0.5)+
      facet_wrap(degron ~ name, scales = "free", ncol = 4) +
      theme_bw() +
      labs(x = NULL) +
      theme(text = element_text(size = 8),
            axis.text.x = element_text(angle = 30),
            legend.key.size = unit(2,"mm"), 
            legend.position = "none")+
  ylab("Half-Life [min.]")+
  xlab("A.U")
```

